LaTeX pseudo located here. "English" pseudo located here (this is before I knew how to use LaTeX
Also, will need for implementation of kernel probability density funcitons: KDE Python Notebook (code.md approved)
For all sine waves created: make sure number of points = 1000 sampled from each wave representing data.
Success:
Hopeful:
Fail:
Basically if we're able to select the poor waves or not and the final dataset does not have the really poor waves.
Start by generating each set of data from above.
import numpy as np
# Fix random seed
initseed = 123456789
np.random.seed(initseed)
numvals = 1000
# First, build the relevant linspace to grab 1000 points
times = np.linspace(0, 1, numvals)
# Then define the general sine wave used throughout
sin = np.sin(2 * np.pi * times)
# Define function for white noise
def gen_whitenoise(mean, std, size):
retval = np.random.normal(mean, std, size=size)
return retval
# Success Case 1 Data
# 50 of the same sine waves
success1 = np.column_stack([sin] * 50)
#print success1.shape
# Success Case 2 Data
# 50 same sine waves, 1 with white noise
success2 = np.column_stack([sin] * 50)
wn = gen_whitenoise(0, 10, numvals)
success2[:, 4] = success2[:, 4] + wn.T
#print success2[0]
# Success Case 3 Data
# 50 different amplitude sine waves, 1 with white noise
success3 = np.column_stack([sin] * 10 +
[sin * 2] * 10 +
[sin * 3] * 10 +
[sin * 4] * 10 +
[sin * 5] * 10)
success3[:, 49] = success3[:, 49] + wn.T
#print success3[0]
#print success3[1]
# Hopeful Case 1 Data
# 40 sine waves, 32 each with small amounts of white noise, 8 with a lot
hope1 = np.column_stack([sin] * 40)
for i in range(0, 32):
hope1[:, i] += gen_whitenoise(0, 0.25, numvals)
for i in range (32, 40):
hope1[:, i] += gen_whitenoise(0, 20, numvals)
#print hope1[:, 0]
# Fail Case 1 Data
# 50 same sine waves, 40 with white noise
fail1 = np.column_stack([sin] * 50)
for i in range(0, 40):
fail1[:, i] = fail1[:, i] + gen_whitenoise(0, 2, numvals)
#print fail1[0]
from plotly.offline import download_plotlyjs, init_notebook_mode, iplot, plot
from plotly.graph_objs import *
from plotly import tools
init_notebook_mode()
# Success 1
# Setup plotly data
datasets =[]
for i in range(0, 5):
datasets.append(Scatter(
x = times,
y = success1[:,i],
name = 'Sample ' + str(i)
))
data = [datasets[:]]
# Setup layout
layout = dict(title = 'Success 1',
xaxis = dict(title = 'Time'),
yaxis = dict(title = 'Unit'),
)
# Make figure object
fig = dict(data=datasets, layout=layout)
iplot(fig)
# Success 2
# Setup plotly data
datasets =[]
for i in range(0, 5):
datasets.append(Scatter(
x = times,
y = success2[:,i],
name = 'Sample ' + str(i)
))
data = [datasets[:]]
# Setup layout
layout = dict(title = 'Success 2',
xaxis = dict(title = 'Time'),
yaxis = dict(title = 'Unit'),
)
# Make figure object
fig = dict(data=datasets, layout=layout)
iplot(fig)
# Success 3
# Setup plotly data
datasets =[]
for i in range(0, success3.shape[1]):
datasets.append(Scatter(
x = times,
y = success3[:,i],
name = 'Sample ' + str(i)
))
data = [datasets[:]]
# Setup layout
layout = dict(title = 'Success 3',
xaxis = dict(title = 'Time'),
yaxis = dict(title = 'Unit'),
)
# Make figure object
fig = dict(data=datasets, layout=layout)
iplot(fig)
# Hopeful 1
# Setup plotly data
datasets =[]
for i in range(0, 10):
datasets.append(Scatter(
x = times,
y = hope1[:,i],
name = 'Sample ' + str(i)
))
data = [datasets[:]]
# Setup layout
layout = dict(title = 'Hope 1',
xaxis = dict(title = 'Time'),
yaxis = dict(title = 'Unit'),
)
# Make figure object
fig = dict(data=datasets, layout=layout)
iplot(fig)
# Fail 1
# Setup plotly data
datasets =[]
for i in range(0, 5):
datasets.append(Scatter(
x = times,
y = fail1[:,i],
name = 'Sample ' + str(i)
))
data = [datasets[:]]
# Setup layout
layout = dict(title = 'Fail 1',
xaxis = dict(title = 'Time'),
yaxis = dict(title = 'Unit'),
)
# Make figure object
fig = dict(data=datasets, layout=layout)
iplot(fig)
# Reshape Data: Run with no reshaping
electrodes = 0
trials = 0
times = 0
print len(success1.shape)
def reshape(inEEG):
if len(inEEG.shape) == 3:
electrodes = inEEG.shape[1]
times = inEEG.shape[0]
trials = inEEG.shape[2]
return np.reshape(inEEG, (inEEG.shape[0] * inEEG.shape[2], inEEG.shape[1]))
elif len(inEEG.shape) != 1 and len(inEEG.shape) != 2:
# fail case
print "fail"
else:
return inEEG
print "Final dimensions:", reshape(success1).shape
# Reshape Data: Run with reshaping
success1 = np.expand_dims(success1, 2)
print len(success1.shape)
print success1.shape
print "Final dimensions:", reshape(success1).shape
success1 = reshape(success1)
# Reshape Data: run with data where reshaping actually matters
dummy = np.dstack([success1] * 2)
print dummy.shape
print dummy[:,:,1].shape
print "Final dimensions:", reshape(dummy).shape
Kernel density probability dist
# Now define the probability density functions tested in other notebooks
from sklearn.neighbors.kde import KernelDensity
from sklearn.grid_search import GridSearchCV
# Run the KDE!
def kdewrap(indata, kernel):
grid = GridSearchCV(KernelDensity(),
{'bandwidth': np.linspace(0.1, 1.0, 30)},
cv=10) # 10-fold cross-validation
%time grid.fit(indata[:, None])
kde = KernelDensity(kernel=kernel, bandwidth=grid.best_params_["bandwidth"]).fit(indata[:, np.newaxis])
return kde.score_samples(indata[:, np.newaxis])
Define what's inside loop the KDE will execute in
regelec = success1[:, 0]
#print regelec
probdist = kdewrap(regelec, 'gaussian')
#print probdist
# get joint prob with assumption in log-likelihood format
# leave in log-likelihood to prevent underflow
jointprob = np.sum(probdist)
print jointprob
These are really all the modular testable parts of the code. Now will combine into algorithm + ending section
from scipy.stats import kurtosis
def prob_baddetec(inEEG, threshold, probfunc):
electrodes = inEEG.shape[1]
# Start by reshaping data (if necessary)
if len(inEEG.shape) == 3:
inEEG = np.reshape(inEEG, (inEEG.shape[0] * inEEG.shape[2], inEEG.shape[1]))
elif len(inEEG.shape) != 1 and len(inEEG.shape) != 2:
# fail case
return -1
# Then, initialize a probability vector of electrode length
probvec = np.zeros(electrodes)
kurtvec = np.zeros(electrodes)
# iterate through electrodes and get joint probs
for i in range(0, electrodes):
# get prob distribution
probdist = probfunc(inEEG[:, i], 'gaussian')
# using probdist find joint prob
probvec[i] = np.sum(probdist)
kurtvec[i] = kurtosis(inEEG[:,i])
# normalize probvec
# first calc mean
avg = np.mean(probvec)
# then st, d dev
stddev = np.std(probvec)
# then figure out which electrodes are bad
badelec = []
#print probvec
for i in range(0, len(probvec)):
#print i, avg, stddev, (avg - probvec[i]) / stddev
if ((avg - probvec[i]) / stddev) >= threshold:
badelec.append(i)
return badelec, kurtvec
def good_elec(inEEG, badelec):
return np.delete(inEEG, badelec, 1)
def qual_eval(badelec, expected):
print "Expected:", expected
print "Actual:", badelec
def qual_plot(data, title):
# Setup plotly data
datasets =[]
for i in range(0, data.shape[1]):
datasets.append(Scatter(
x = times,
y = data[:,i],
name = 'Sample ' + str(i)
))
data = [datasets[:]]
# Setup layout
layout = dict(title = title,
xaxis = dict(title = 'Time'),
yaxis = dict(title = 'Unit'),
)
# Make figure object
fig = dict(data=datasets, layout=layout)
iplot(fig)
Just check the correct number of bad electrodes were removed and which ones were removed
def quant_eval(badelec, expected):
if set(x) == set(y):
return true
else:
return false
dummybad = prob_baddetec(success3, 3, kdewrap)
print dummybad
# First run on all datasets
s1bad = prob_baddetec(success1, 3, kdewrap)
s2bad = prob_baddetec(success2, 3, kdewrap)
s3bad = prob_baddetec(success3, 3, kdewrap)
h1bad = prob_baddetec(hope1, 2, kdewrap)
f1bad = prob_baddetec(fail1, 3, kdewrap)
qual_eval(s1bad, [])
qual_plot(good_elec(success1, s1bad), "Success 1")
qual_eval(s2bad, [4])
qual_plot(good_elec(success2, s2bad), "Success 2")
qual_eval(s3bad, [49])
qual_plot(good_elec(success3, s3bad), "Success 3")
qual_eval(h1bad, range(32,40))
qual_plot(good_elec(hope1, h1bad), "Hope 1")
qual_eval(f1bad, [])
qual_plot(good_elec(fail1, f1bad), "Fail 1")
Honestly, anything being gotten out of quanitative eval was gotten earlier
Unfortunately, what I expected to happen didn't exactly happen. At 3 standard deviations away (which is a standard lent by what the researchers used in their own probability implementation), none of the noisy data was there. Attempting to modify the noise inputted into the data by changing the scale of which the noise was being added (ie changing stddev from 1 -> 2 -> 3) had literally no effect on the outcome. What makes sense is that maybe because the number of white noise values being used is so high, they accurately depict a normal graph, and if they depict a normal graph, the distribution that is eventually modelled by the kernel probability is essentially a normal graph. Not sure best way to deal with this problem (or should I fix simulations?) but will ask Jovo for help before moving more